package com.shujia.core

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object Demo4Filter {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("map算子演示")
    val context = new SparkContext(conf)
    //====================================================
    val studentRDD: RDD[String] = context.textFile("spark/data/students.csv")

    /**
     *  filter: 过滤，将RDD中的数据一条一条取出传递给filter后面的函数，如果函数的结果是true，该条数据就保留，否则丢弃
     *
     *  filter一般情况下会减少数据的条数
     */
    val filterRDD: RDD[String] = studentRDD.filter((s: String) => {
      val strings: Array[String] = s.split(",")
      "男".equals(strings(3))
    })


    filterRDD.foreach(println)



  }
}
